Warning: package 'tidycensus' was built under R version 4.4.1
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
here() starts at /Users/jocardelle/MEDS/Fall_24/EDS-222-statistics/final-project/nyc-trees
library(stars)
Loading required package: abind
Warning: package 'abind' was built under R version 4.4.1
Loading required package: sf
Warning: package 'sf' was built under R version 4.4.1
Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
library(tidyr)library(dplyr)library(units)
udunits database from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/units/share/udunits/udunits2.xml
library(mapview)
# Hypothesis graphdf <-data.frame(x =c(1:100))df$y <-1/3* df$xggplot (df, aes(x, y)) +geom_line() +xlim(0, 50) +ylim(0, 50) +labs(title ="Trees and Income in NYC Hypothesis",x ="median income($)",y =expression(paste("tree per ", km^2))) +theme_minimal() +theme(axis.text.x=element_blank(),axis.text.y =element_blank())
Warning: Removed 50 rows containing missing values or values outside the scale range
(`geom_line()`).
# Read in income for New York City census tractsnyc <-get_acs(state ="NY",county =c("Bronx", "Kings", "New York", "Richmond", "Queens"),geography ="tract",variables ="B19013_001",geometry =TRUE,year =2015)
Getting data from the 2011-2015 5-year ACS
Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
# Read in tree datanyc_trees <-read_csv(here('data/2015StreetTreesCensus_TREES.csv'))
Rows: 683788 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (28): created_at, the_geom, curb_loc, status, health, spc_latin, spc_com...
dbl (14): tree_id, block_id, tree_dbh, stump_diam, cb_num, borocode, cncldis...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Check the different status' of the treesunique(nyc_trees$status)
[1] "Alive" "Dead" "Stump"
# Filter to trees that are alivenyc_trees_alive <- nyc_trees %>%filter(status =="Alive")
# Add new column to nyc that calculate the area for each census tractnyc <- nyc %>%mutate(area_km2 =as.numeric(st_area(geometry)/1e6))# Map income by census tractmapview(nyc,zcol ="estimate",layer.name ="Median income ($)")
# Make trees data set into sf object and set crs to matchnyc_trees_sf <-st_as_sf(nyc_trees_alive, coords =c("longitude", "Latitude"), crs =st_crs(nyc))# Join trees and income by st_within and coount trees in each census tractnyc_trees_income <- nyc_trees_sf %>%st_join(nyc, join = st_within) %>%group_by(GEOID) %>%summarize(tree_count =n())# Add tree count data back to income datatreecount_income <-st_join(nyc, nyc_trees_income) %>%select(-c('GEOID.x', 'GEOID.y'))# Add tree per km2 columntreecount_income <- treecount_income %>%mutate(tree_per_km2 = (tree_count/area_km2))# Map trees per km2mapview(treecount_income,zcol ="tree_per_km2",layer.name ="trees per square kilometer")
ggplot(treecount_income, aes(x = estimate, y = tree_per_km2)) +geom_point() +geom_smooth(method ='lm') +labs(title ="Trees and Income in NYC",x ="median income($)",y =expression(paste("tree per ", km^2)))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 67 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 67 rows containing missing values or values outside the scale range
(`geom_point()`).